---
title: "Pilot Survey Data Analysis"
output: html_document
date: "2025-07-21"
---


## Set up - packages and functions
```{r}

rm(list = ls())

library(dplyr)
library(foreign)
library(haven)
library(tidyverse)
library(broom.mixed)
library(kableExtra)
library(gridExtra)
library(jtools)
library(tables)
library(Hmisc)
library(scales)
library(table1)
library(stargazer)
library(gtsummary)
library(cardx)

#probability of superiorty function
get_ps <- function(model, var, data, outcome) {
  beta <- coef(model)[var]
  sd_outcome <- sd(data[[outcome]], na.rm = TRUE)
  cohens_d <- beta / sd_outcome
  ps <- pnorm(cohens_d / sqrt(2))
  return(ps)
}

#theme for figures
larger_text_theme <- theme(
  text = element_text(size = 16),
  axis.text.y = element_text(size = 16),
    plot.title = element_text(size = 20) 
)



```

#Cleaning/Recoding Survey Data
```{r}

pilot <- read_csv("pilot_nopii.csv", 
                   col_types = cols(knowledge = col_character()))

#Create party scale variable
pilot$party_7 <- NA
pilot$party_7[pilot$democrat == 1] <- 1
pilot$party_7[pilot$democrat == 2] <- 2
pilot$party_7[pilot$independent == 3] <- 3
pilot$party_7[pilot$independent == 2] <- 4
pilot$party_7[pilot$independent == 1] <- 5
pilot$party_7[pilot$republican == 2] <- 6
pilot$party_7[pilot$republican == 1] <- 7

pilot$party_7[is.na(pilot$party_7)]<-mean(pilot$party_7,na.rm=TRUE)


#Make don't know's NA
pilot <- pilot %>%
  mutate_at(vars(record1:record4), ~ifelse(. == 6, NA, .))

#Make 99's (empty) into NA
pilot <- pilot %>%
  mutate_all(~ifelse(. == "-99", NA, .))

#recoding mixed-up confidence scale values
pilot$conf1 <- ifelse(pilot$conf1 == 5, 4, ifelse(pilot$conf1 == 6, 5, pilot$conf1))
pilot$conf2 <- ifelse(pilot$conf2 == 5, 4, ifelse(pilot$conf2 == 4, 5, pilot$conf1))

pilot$vote_2020 <- ifelse(pilot$vote_2020 == "2", 0, 1)

race_labels <- c("White", "Hispanic or Latino", "Black or African American",
                 "American Indian or Alaska Native", "Asian", 
                 "Native Hawaiian or Pacific Islander", "Middle Eastern",
                 "Multiracial", "Other")  

# Create a new column 'Race' 
pilot$Race <- race_labels[pilot$race]

# vote choice
vote_labels <- c("Biden", "Trump", "Third Party", "Didn't Vote")  # Include "Other" for completeness
pilot$vote_choice_2020 <- vote_labels[pilot$vote_choice_2020]

#Recode outcomes so that greater values mean more confidence, greater accessibility, more concerns, greater denialism etc.,
pilot <- pilot %>%
  mutate(
    record1 = abs(record1 - 6),
    record2 = abs(record2 - 6),
    record3 = abs(record3 - 6),
    record4 = abs(record4 - 6),
        conf1 = abs(conf1 - 6),
    conf2 = abs(conf2 - 6),
    conf3 = abs(conf3 - 6),
    conf4 = abs(conf4 - 6),
    conf5 = abs(conf5 - 6),
    concern1 = abs(concern1 - 6),
    concern2 = abs(concern2 - 6),
    concern3 = abs(concern3 - 6),
    concern4 = abs(concern4 - 6),
    concern5 = abs(concern5 - 6),
    concern6 = abs(concern6 - 6),
    concern1_out = abs(concern1_out - 6),
    concern2_out = abs(concern2_out - 6),
    concern3_out = abs(concern3_out - 6),
    concern4_out = abs(concern4_out - 6),
    concern5_out = abs(concern5_out - 6),
    concern6_out = abs(concern6_out - 6),
      denial1 = abs(denial1 - 6),
        denialism2 = abs(denialism2 - 6),
        denialism3 = abs(denialism3 - 6),
        denialism4 = abs(denialism4 - 6)
  )

# Create additive scales
pilot <- pilot %>%
  mutate(
    denial = scale((denial1 + denialism2 + denialism3 + denialism4)/4),
    record = scale((record1 + record2 + record3 + record4)/4),
    conf = scale((conf1 + conf2 + conf3 + conf4)/5),
    conf_local = scale((conf1 + conf2 + conf3)/3),
    concern = scale((concern1 + concern2 + concern3 + concern4 + concern5 + concern6)/6),
    concern_out = scale((concern1_out + concern2_out + concern3_out + concern4_out + concern5_out + concern6_out)/6)
  )

# Relevel with "control" as the reference group
pilot$condition <- as.factor(pilot$treatment)
pilot$condition <- relevel(pilot$condition, ref = "control")

# Combine 'audit' and 'pr' into 'treatment' for the new variable 'condition2' - those in either
pilot$condition_pooled <- ifelse(pilot$condition %in% c("audit", "pr"), "treatment", "control")


# Additional Labels

pilot$`Party (7 point scale)` <- pilot$party_7
pilot$`Education` <- pilot$educ
pilot$`Voted 2020` <- pilot$vote_2020
pilot$`Vote Choice 2024` <- pilot$vote_choice_2020
pilot$TreatmentName <- pilot$condition
levels(pilot$TreatmentName) <- c("Control", "Online Search Tool", "Public Records", "GOTV")

```

### Balance tests ###
```{r}

covariates <- c("educ","denial","vote_2020","party_7","Age")

# Function to summarize and test balance for each covariate
balance_test <- function(data, covariate) {
  summary <- data %>%
    group_by(condition) %>%
    summarise_at(vars({{covariate}}), list(mean = ~mean(., na.rm = TRUE)))
  
  p_values <- t.test(data %>% filter(condition == unique(condition)[1]) %>% pull({{covariate}}),
                     data %>% filter(condition == unique(condition)[2]) %>% pull({{covariate}}))$p.value
  
  summary <- bind_cols(summary, p_value = p_values)
  return(summary)
}

# Apply the function to each covariate
results <- lapply(covariates, function(x) balance_test(pilot, x))

# Combine results into a single data frame
results_df <- bind_rows(results, .id = "Variable")

print(results_df)


```

### Balance tests ###
```{r}
pilot %>%
  tbl_cross(
    row = state,
    col = condition,
    percent = "cell"
  ) %>%
  add_p()

pilot %>%
  tbl_cross(
    row = vote_choice_2020,
    col = condition,
    percent = "cell"
  ) %>%
  add_p()


# Use fisher.test for gender because of small counts
pilot %>%
  tbl_cross(row = Sex, col = condition, percent = "cell") %>%
  add_p(
    test = "fisher.test",
    test.args = list(workspace = 2e8)
  )

```

## Manipulation Check
```{r}

#Manipulation check for each of 4 experimental conditions

#proportion with correct answer (	Voting records are public  (2) )
#Search Tool (Also called 'audit')
pilot %>%
  count(mc_audit_all) %>%
  filter(mc_audit_all %in% c("1", "2", "3")) %>%
  mutate(proportion_2 = n / sum(n)) %>%
  filter(mc_audit_all == "2") %>%
  pull(proportion_2)

#proportion with correct answer (	Recycling  (1)  )
# CONTROL
pilot %>%
  count(mc_control_all) %>%
  filter(mc_control_all %in% c("1", "2", "3")) %>%
  mutate(proportion_2 = n / sum(n)) %>%
  filter(mc_control_all == "1") %>%
  pull(proportion_2)


#proportion with correct answer (	Voting records are public  (2) )
# PR
pilot %>%
  count(mc_pr_all) %>%
  filter(mc_pr_all %in% c("1", "2", "3")) %>%
  mutate(proportion_2 = n / sum(n)) %>%
  filter(mc_pr_all == "2") %>%
  pull(proportion_2)

#proportion with correct answer (	Voting  (1) )

# GOTV 
pilot %>%
  count(mc_vote_all) %>%
  filter(mc_vote_all %in% c("1", "2", "3")) %>%
  mutate(proportion_2 = n / sum(n)) %>%
  filter(mc_vote_all == "1") %>%
  pull(proportion_2)

```


# MANUSCRIPT: TABLES and FIGURES

# Table 2 : Effect of Experimental Conditions on Election Attitudes (Compared to Pure Control)
```{r, warning= FALSE}

pilot2 <- subset(pilot, !condition =="voting")
pilot2$condition <- relevel(pilot2$condition, ref = "control")
m1 <- lm(formula = record ~ condition + denial + as.factor(state), data = pilot2)
m2 <- lm(formula = conf ~ condition + denial  + as.factor(state), data = pilot2)
m3 <- lm(formula = concern ~ condition + denial + as.factor(state), data = pilot2)

stargazer(list(m1,m2,m3), 
          omit = c("state"), 
          omit.labels = c("State fixed effects"),
          covariate.labels = c("Online Search Tool", "Public Records", "Denialism"),
          column.labels = c("Record Access", "Election Confidence", "Issues within State"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Effect of Experimental Conditions on Election Attitudes (Compared to Pure Control)",
          dep.var.caption = "Election Attitudes"
)

#Probability of Superiority 
ps_m1 <- get_ps(m1, "conditionaudit", pilot2, "record")
ps_m1a <- get_ps(m1, "conditionpr", pilot2, "record")
ps_m2 <- get_ps(m2, "conditionaudit", pilot2, "conf")
ps_m2a <- get_ps(m2, "conditionpr", pilot2, "conf")
ps_m3 <- get_ps(m3, "conditionaudit", pilot2, "concern")
ps_m3b <- get_ps(m3, "conditionpr", pilot2, "concern")

ps_results <- c(ps_m1, ps_m1a, ps_m2, ps_m2a, ps_m3, ps_m3b)
print(ps_results)


```

# Table 3: Effect of Experimental Conditions on Election Attitudes
```{r, warning = FALSE}


# Online Search Tool vs Public Records
pilot2 <- subset(pilot, !condition =="voting")
pilot2 <- subset(pilot2, !condition =="control")
pilot2$condition <- relevel(pilot2$condition, ref = "pr")
m4 <- lm(formula = record ~ condition + denial + as.factor(state), data = pilot2)
m5 <- lm(formula = conf ~ condition + denial + as.factor(state), data = pilot2)

#Probability of Superiority 
ps_m4 <- get_ps(m4, "conditionaudit", pilot2, "record")
ps_m5 <- get_ps(m5, "conditionaudit", pilot2, "conf")

ps_results <- c(ps_m4, ps_m5)
print(ps_results)


# Online Search Tool vs GOTV
pilot2 <- subset(pilot, !condition =="control")
pilot2 <- subset(pilot2, !condition =="pr")
pilot2$condition <- relevel(pilot2$condition, ref = "voting")
m6 <- lm(formula = record ~ condition + denial  + as.factor(state), data = pilot2)
m7 <- lm(formula = conf ~ condition + denial + as.factor(state), data = pilot2)

#Probability of Superiority 
ps_m6 <- get_ps(m6, "conditionaudit", pilot2, "record")
ps_m7 <- get_ps(m7, "conditionaudit", pilot2, "conf")

ps_results <- c(ps_m6, ps_m7)
print(ps_results)

# Table 3 
stargazer(list(m4,m5, m6,m7), 
          omit = c("state"), 
          omit.labels = c("State fixed effects"),
          covariate.labels = c("Online Search Tool", "Denialism", "Constant"),
          column.labels = c("Records Access", "Confidence", "Records Access", "Confidence"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Effect of Experimental Conditions on Election Attitudes"
)
```


#Figure 1: Average Treatment Effects Across Experimental Conditions by Party and Race

```{r, warning = FALSE}

# Heterogeneous treatment effects

# Race 
pilot2 <- subset(pilot, !condition =="voting")
white <- subset(pilot2, Race == "White")
nw <- subset(pilot2, !Race == "White")

#Race (white versus non white)
m8 <- lm(record ~ condition + denial + as.factor(state),  data = white)
m9 <- lm(record ~ condition + denial + as.factor(state), data = nw)
m10 <- lm(conf ~ condition + denial + as.factor(state),  data = white)
m11 <- lm(conf ~ condition + denial + as.factor(state), data = nw)

# Appendix Table 11         
stargazer(list(m8,m9,m10,m11), 
          omit = c("state"), 
          omit.labels = c("State fixed effects"),
          covariate.labels = c("Search Tool", "Public Records", "Denialism"),
          column.labels = c("Record Access (White)", "Record Access (Non-White)", "Election Confidence (White)","Election Confidence (Non-White)"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Effect of Experimental Conditions on Election Attitudes: By Race",
          dep.var.caption = "Election Attitudes"
)

#PARTY

#subset df
democrat <- subset(pilot2, party_7 <= 3)
republican <- subset(pilot2, party_7 > 3)

#models
m12 <- lm(record ~ condition + denial + as.factor(state),  data = democrat)
m13 <- lm(record ~ condition + denial + as.factor(state),  data = republican)

m14 <- lm(conf ~ condition + denial + as.factor(state),  data = democrat)
m15 <- lm(conf ~ condition + denial + as.factor(state),  data = republican)

# Appendix Table 10
stargazer(list(m12,m13,m14,m15), 
          omit = c("state"), 
          omit.labels = c("State fixed effects"),
          covariate.labels = c("Search Tool", "Public Records", "Denialism", "Constant"),
          column.labels = c("Record Access (Lean Dem)", "Record Access (Lean Rep)", "Election Confidence (Lean Dem)","Election Confidence (Lean Rep)"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Effect of Experimental Conditions on Election Attitudes: By Party",
          dep.var.caption = "Election Attitudes"
)

#Probability of Superiority 
ps_m12 <- get_ps(m12, "conditionaudit", democrat, "record")
ps_m13 <- get_ps(m13, "conditionaudit", republican, "conf")
ps_m14 <- get_ps(m14, "conditionaudit", democrat, "record")
ps_m15 <- get_ps(m15, "conditionaudit", republican, "conf")

ps_results <- c(ps_m12, ps_m13, ps_m14,ps_m15)
print(ps_results)

############################
# FIGURE 1
############################

# Plot with colors

plot_race <- plot_summs(m8, m9, m10, m11, inner_ci_level = .9, colors = c("darkgreen","darkorange","green4", "orange3"),
                         coefs = c("Online Search Tool" = "conditionaudit", "Public Records" = "conditionpr"),
                        omit.coefs = c("conditionvoting", "denial", "as.f==actor(state)NC", "as.factor(state)VA", "as.factor(state)WI", "(Intercept)"),
                        model.names = c("Record Access (White)", "Record Access (Non-White)", "Confidence (White)", "Confidence (Non-White)")) +
             ggtitle("Race")

plot_party <- plot_summs(m12, m13, m14, m15, inner_ci_level = 0.9, colors = c("darkblue","darkred","blue","red"),
                         coefs = c("Online Search Tool" = "conditionaudit", "Public Records" = "conditionpr"),
                         omit.coefs = c("conditionvoting","denial", "as.factor(state)NC", "as.factor(state)VA", "as.factor(state)WI", "(Intercept)"),
                         model.names = c("Record Access (Lean Dem)", "Record Access (Lean Rep)", "Confidence (Lean Dem)", "Confidence (Lean Rep)")) +
              ggtitle("Party")


# include theme
plot_race <- plot_race + larger_text_theme
plot_party <- plot_party + larger_text_theme

# Combine race and party plots into a single figure
ate_race_party <- grid.arrange(plot_race, plot_party, nrow = 2)
ate_race_party
#ggsave(filename = path_to_save, plot = ate_race_party, width = 10, height = 8, dpi = 300)

```



# APPENDIX B

# Table B.3: Descriptives
```{r}
table1(~ Sex + Age + Race + Education + `Party (7 point scale)` + `Voted 2020`+ `Vote Choice 2024`| condition, data=pilot, topclass="Rtable1-zebra")
#save as descriptives.png
```

#Table 3: Confidence at Personal, County, State and National level

```{r, warning = FALSE}
m1 <- lm(formula = conf1 ~ condition + denial + as.factor(state), data = pilot)
m2 <- lm(formula = conf2 ~ condition + denial + as.factor(state), data = pilot)
m3 <- lm(formula = conf3 ~ condition + denial + as.factor(state), data = pilot)
m4 <- lm(formula = conf4 ~ condition + denial + as.factor(state), data = pilot)

stargazer(list(m1, m2, m3, m4), 
          omit = c("state"), 
          omit.labels = c("State fixed effects"),
          covariate.labels = c("Online Search tool", "Public Records", "GOTV", "Denialism"),
          column.labels = c("Self", "County", "State","National"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Effect of Experimental Conditions on Election Attitudes",
          dep.var.caption = "Election Attitudes"
)

```

# Table 4 (Party) & Table 5 (Race)

```{r}
# Heterogeneous treatment effects

# Race 
pilot2 <- subset(pilot, !condition =="voting")
white <- subset(pilot2, Race == "White")
nw <- subset(pilot2, !Race == "White")

#Race (white versus non white)
m8 <- lm(record ~ condition + denial + as.factor(state),  data = white)
m9 <- lm(record ~ condition + denial + as.factor(state), data = nw)
m10 <- lm(conf ~ condition + denial + as.factor(state),  data = white)
m11 <- lm(conf ~ condition + denial + as.factor(state), data = nw)

# Appendix Table 11         
stargazer(list(m8,m9,m10,m11), 
          omit = c("state"), 
          omit.labels = c("State fixed effects"),
          covariate.labels = c("Search Tool", "Public Records", "Denialism"),
          column.labels = c("Record Access (White)", "Record Access (Non-White)", "Election Confidence (White)","Election Confidence (Non-White)"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Effect of Experimental Conditions on Election Attitudes: By Race",
          dep.var.caption = "Election Attitudes"
)

#PARTY

#subset df
democrat <- subset(pilot2, party_7 <= 3)
republican <- subset(pilot2, party_7 > 3)

#models
m12 <- lm(record ~ condition + denial + as.factor(state),  data = democrat)
m13 <- lm(record ~ condition + denial + as.factor(state),  data = republican)

m14 <- lm(conf ~ condition + denial + as.factor(state),  data = democrat)
m15 <- lm(conf ~ condition + denial + as.factor(state),  data = republican)

# Appendix Table 10
stargazer(list(m12,m13,m14,m15), 
          omit = c("state"), 
          omit.labels = c("State fixed effects"),
          covariate.labels = c("Search Tool", "Public Records", "Denialism", "Constant"),
          column.labels = c("Record Access (Lean Dem)", "Record Access (Lean Rep)", "Election Confidence (Lean Dem)","Election Confidence (Lean Rep)"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Effect of Experimental Conditions on Election Attitudes: By Party",
          dep.var.caption = "Election Attitudes"
)
```

# Table 6: Education
```{r, warning = FALSE}

m1 <- lm(formula = record ~ condition + denial + as.factor(state), data = subset(pilot, Education > 4))
m2 <- lm(formula = record ~ condition + denial + as.factor(state), data = subset(pilot, Education < 5))

m3 <- lm(formula = conf ~ condition + denial + as.factor(state), data = subset(pilot, Education > 4))
m4 <- lm(formula = conf ~ condition + denial + as.factor(state), data = subset(pilot, Education < 5))

stargazer(list(m1,m2,m3,m4), 
          omit = c("state"), 
          omit.labels = c("State fixed effects"),
          covariate.labels = c("Online Search Tool", "Public Records", "GOTV", "Denialism"),
    column.labels = c("Record Access (4 year degree)", "Record Access (No 4 year degree)", "Election Confidence (4 year)","Election Confidence (No 4 year degree)"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Effect of Experimental Conditions on Election Attitudes: By Education",
          dep.var.caption = "Election Attitudes"
)

```

#Table 7: Interactions (Age and Voted 2020)

```{r, warning = NULL}

m16 <- lm(record ~ condition_pooled*vote_2020 + denial + as.factor(state),
        subset(pilot, !condition =="voting"))

m17 <- lm(conf ~ condition_pooled*vote_2020 + denial + as.factor(state),
        subset(pilot, !condition =="voting"))

m18 <- lm(record ~ condition_pooled*Age + denial + as.factor(state),
        subset(pilot, !condition =="voting"))

m19 <- lm(conf ~ condition_pooled*Age + denial + as.factor(state),
        subset(pilot, !condition =="voting"))

m20 <- lm(record ~ condition_pooled*state + denial,
         subset(pilot, !condition =="voting"))

m21 <- lm(conf ~ condition_pooled*state + denial,
         subset(pilot, !condition =="voting"))
stargazer(list(m16,m17,m18,m19), 
          omit = c("state"), 
          omit.labels = c("State fixed effects"),
          covariate.labels = c("Treatment", "Voted 2020", "Age", "Denialism", "Treatment x Voted 2020", "Treatment x Age"),
          column.labels = c("Record Access", "Confidence", "Record Access","Confidence"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Intereactive Effect of Treatment on Election Attitudes: Age and Voted in 2020 (Self Report)",
          dep.var.caption = "Election Attitudes"
)
```

# Table 8: State
```{r, warning=NULL}

m20 <- lm(record ~ condition_pooled*state + denial,
          subset(pilot, !condition =="voting"))
m21 <- lm(conf ~ condition_pooled*state + denial,
          subset(pilot, !condition =="voting"))

stargazer(list(m20, m21), 
          covariate.labels = c("Treatment", "NC", "VA", "WI", "Denialism", "Treatment x NC",
                               "Treatment x VA","Treatment x WI"),
          column.labels = c("Record Access", "Confidence"),
          omit.stat = c("rsq", "f"), 
          df = FALSE, 
          align = TRUE, 
          digits = 2, 
          no.space = TRUE,
          title = "Intereactive Effect of Treatment on Election Attitudes: State",
          dep.var.caption = "Election Attitudes"
)


```


